## pval_cutoff: 0.05
## lfc_cutoff: 0.585
## low_counts_cutoff: 10
General statistics
# Number of samples
length(counts_data)
## [1] 6
# Number of genes
nrow(counts_data)
## [1] 55487
# Total counts
colSums(counts_data)
## SRR15202006 SRR15202007 SRR15202008 SRR15202009 SRR15202010 SRR15202011
## 12917736 11424047 11549638 11156824 11503014 8835159

Create DDS objects
# Create DESeqDataSet object
dds <- get_DESeqDataSet_obj(counts_data, ~ treatment)
## [1] TRUE
## [1] TRUE
## [1] "DESeqDataSet object of length 55487 with 0 metadata columns"
## [1] "DESeqDataSet object of length 18539 with 1 metadata column"
colData(dds)
## DataFrame with 6 rows and 2 columns
## treatment label
## <factor> <character>
## SRR15202006 control control
## SRR15202007 control control
## SRR15202008 control control
## SRR15202009 diabetes diabetes
## SRR15202010 diabetes diabetes
## SRR15202011 diabetes diabetes
Sample-to-sample comparisons
# Transform data (blinded rlog)
rld <- get_transformed_data(dds)
PCA plot
pca <- rld$pca
pca_df <- cbind(as.data.frame(colData(dds)) %>% rownames_to_column(var = 'name'), pca$x)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 8.0565 6.4744 4.5459 3.89603 3.53047 5.833e-14
## Proportion of Variance 0.4184 0.2702 0.1332 0.09784 0.08035 0.000e+00
## Cumulative Proportion 0.4184 0.6886 0.8218 0.91965 1.00000 1.000e+00
ggplot(pca_df, aes(x = PC1, y = PC2, color = label)) +
geom_point() +
geom_text(aes(label = name), position = position_nudge(y = -2), show.legend = F, size = 3) +
scale_color_manual(values = colors_default) +
scale_x_continuous(expand = c(0.2, 0))

Correlation heatmap
pheatmap(
cor(rld$matrix),
annotation_col = as.data.frame(colData(dds)) %>% select(label),
color = brewer.pal(8, 'YlOrRd')
)

Wald test results
# DE analysis using Wald test
dds_full <- DESeq(dds)
colData(dds_full)
## DataFrame with 6 rows and 3 columns
## treatment label sizeFactor
## <factor> <character> <numeric>
## SRR15202006 control control 1.20226969203872
## SRR15202007 control control 1.03012106776167
## SRR15202008 control control 1.02777020748213
## SRR15202009 diabetes diabetes 1.01560091107274
## SRR15202010 diabetes diabetes 1.02814739826445
## SRR15202011 diabetes diabetes 0.765929323076306
# Wald test results
res <- results(
dds_full,
contrast = c('treatment', condition, control),
alpha = pval_cutoff
)
res
## log2 fold change (MLE): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 18539 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.94931283734203 0.276349613156709 1.18546282632121 0.233115376560809 0.815671815032003 NA
## ENSMUSG00000025900 7.98440212267686 -2.14383156244587 3.30769537619857 -0.648134522263569 0.516897947265442 0.948461065747594
## ENSMUSG00000033845 601.186551710163 -0.0207568757425066 0.131724813999565 -0.157577567295521 0.87478968140519 0.997286083185086
## ENSMUSG00000102275 11.3634433371236 0.125323015269519 0.611454702784539 0.204958788768495 0.837604323246014 0.994398645041058
## ENSMUSG00000025903 784.159376871626 -0.334472411905051 0.136968935917382 -2.44195816857918 0.014607839155895 0.225658636308955
## ... ... ... ... ... ... ...
## ENSMUSG00000079808 5.77444040572882 0.886223226362745 0.942944483704387 0.939846662956434 0.347296219445711 NA
## ENSMUSG00000095041 560.881170460927 -0.4406688583868 0.191983785835116 -2.29534414310001 0.0217134097098608 0.28479179385656
## ENSMUSG00000063897 180.316950616363 -0.238715989752614 0.187828115600774 -1.27092788525867 0.20375430501163 0.781834525626438
## ENSMUSG00000096730 6.19649895917884 3.64645982576529 1.25159661203719 2.91344654555277 0.00357463030851782 0.0948451643847944
## ENSMUSG00000095742 342.933478720934 0.0624882360224886 0.165488854149303 0.377597853001702 0.705729359873481 0.991738963103486
mcols(res)
## DataFrame with 6 rows and 2 columns
## type description
## <character> <character>
## baseMean intermediate mean of normalized counts for all samples
## log2FoldChange results log2 fold change (MLE): treatment diabetes vs control
## lfcSE results standard error: treatment diabetes vs control
## stat results Wald statistic: treatment diabetes vs control
## pvalue results Wald test p-value: treatment diabetes vs control
## padj results BH adjusted p-values
summary(res)
##
## out of 18539 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 233, 1.3%
## LFC < 0 (down) : 174, 0.94%
## outliers [1] : 6, 0.032%
## low counts [2] : 3595, 19%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotDispEsts(dds_full)

Summary details
# Upregulated genes (LFC > 0)
res_sig_df %>% filter(log2FoldChange > 0)
# Downregulated genes (LFC < 0)
res_sig_df %>% filter(log2FoldChange < 0)
# Outliers (pvalue and padj are NA)
res[which(is.na(res$pvalue)), ]
## log2 fold change (MLE): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000027360 43.990046279636 2.2637070707949 1.47676415464518 1.53288327298194 NA NA
## ENSMUSG00000066154 12.6198758639578 -6.99247637629702 3.91001848662718 -1.78834867410788 NA NA
## ENSMUSG00000029368 113.599722101597 -6.56853431209121 2.43631878953958 -2.69608983040045 NA NA
## ENSMUSG00000030324 129.61049957254 -1.75077569761033 1.72151833335487 -1.01699509304582 NA NA
## ENSMUSG00000034837 44.1551935863187 -1.47932289298483 1.66892199799825 -0.886394268131867 NA NA
## ENSMUSG00000058207 13.5697821305984 -6.11779937075669 3.3559714078477 -1.82295932451947 NA NA
# Low counts (only padj is NA)
res[which(is.na(res$padj) & !is.na(res$pvalue)), ]
## log2 fold change (MLE): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 3595 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.94931283734203 0.276349613156709 1.18546282632121 0.233115376560809 0.815671815032003 NA
## ENSMUSG00000062588 1.55642329438726 -0.420073611125896 1.64360031557284 -0.255581364365636 0.798274109658747 NA
## ENSMUSG00000097797 2.826381212207 0.0183501154967914 1.19909927206641 0.0153032496343432 0.987790249954279 NA
## ENSMUSG00000076135 1.71864614297385 -1.26137187529182 1.70483530473217 -0.739879020448828 0.459373405414171 NA
## ENSMUSG00000079671 2.74837977779127 3.0128926638373 1.4835384222848 2.03088279924503 0.0422668850511438 NA
## ... ... ... ... ... ... ...
## ENSMUSG00000064356 2.0954888614929 0.416281106718632 1.4434226990867 0.288398614613742 0.773041628080273 NA
## ENSMUSG00000064359 1.8297716866088 1.07753271268951 1.53980084877215 0.699787062430017 0.484060295544053 NA
## ENSMUSG00000094054 3.56162319611929 1.19559954963445 1.14630776613543 1.0430004794133 0.296948070722092 NA
## ENSMUSG00000079190 3.94980632963843 1.37251789058926 1.21922636608154 1.12572851832296 0.260280448736632 NA
## ENSMUSG00000079808 5.77444040572882 0.886223226362745 0.942944483704387 0.939846662956434 0.347296219445711 NA
Shrunken LFC results
plotMA(res)

# Shrunken LFC results
res_shrunken <- lfcShrink(
dds_full,
coef = str_c('treatment_', condition, '_vs_', control),
type = 'apeglm'
)
res_shrunken
## log2 fold change (MAP): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 18539 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.94931283734203 0.00193931583011697 0.0987541555042176 0.815671815032003 NA
## ENSMUSG00000025900 7.98440212267686 -0.00159054986258085 0.0990616361468123 0.516897947265442 NA
## ENSMUSG00000033845 601.186551710163 -0.00698353323823402 0.079354205705791 0.87478968140519 0.991083633967551
## ENSMUSG00000102275 11.3634433371236 0.0032346167578079 0.0978750167346095 0.837604323246014 NA
## ENSMUSG00000025903 784.159376871626 -0.211049245634755 0.155818447718305 0.014607839155895 0.205998744663013
## ... ... ... ... ... ...
## ENSMUSG00000079808 5.77444040572882 0.00963515698837386 0.099237022184435 0.347296219445711 NA
## ENSMUSG00000095041 560.881170460927 -0.195557530078333 0.250957456105016 0.0217134097098608 0.259190053310925
## ENSMUSG00000063897 180.316950616363 -0.0587747396353834 0.105999348468791 0.20375430501163 0.736913272088016
## ENSMUSG00000096730 6.19649895917884 2.45647752665413 1.61876698725542 0.00357463030851782 NA
## ENSMUSG00000095742 342.933478720934 0.0167587298375755 0.0863548053931987 0.705729359873481 0.971653150724613
plotMA(res_shrunken)

mcols(res_shrunken)
## DataFrame with 5 rows and 2 columns
## type description
## <character> <character>
## baseMean intermediate mean of normalized counts for all samples
## log2FoldChange results log2 fold change (MAP): treatment diabetes vs control
## lfcSE results posterior SD: treatment diabetes vs control
## pvalue results Wald test p-value: treatment diabetes vs control
## padj results BH adjusted p-values
summary(res_shrunken, alpha = pval_cutoff)
##
## out of 18539 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 227, 1.2%
## LFC < 0 (down) : 181, 0.98%
## outliers [1] : 6, 0.032%
## low counts [2] : 5390, 29%
## (mean count < 15)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
Summary details
# Upregulated genes (LFC > 0)
res_shrunken_sig_df %>% filter(log2FoldChange > 0)
# Downregulated genes (LFC < 0)
res_shrunken_sig_df %>% filter(log2FoldChange < 0)
# Outliers (pvalue and padj are NA)
res_shrunken[which(is.na(res_shrunken$pvalue)), ]
## log2 fold change (MAP): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 6 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000027360 43.990046279636 0.00870122005285103 0.0995098584917968 NA NA
## ENSMUSG00000066154 12.6198758639578 -0.00202336351705031 0.0991008993961252 NA NA
## ENSMUSG00000029368 113.599722101597 -0.00492877019659501 0.0992504340078792 NA NA
## ENSMUSG00000030324 129.61049957254 -0.00516756591834864 0.0991546719876175 NA NA
## ENSMUSG00000034837 44.1551935863187 -0.00477836826060306 0.0991049466152344 NA NA
## ENSMUSG00000058207 13.5697821305984 -0.00277290094035736 0.0991252987045466 NA NA
# Low counts (only padj is NA)
res_shrunken[which(is.na(res_shrunken$padj) & !is.na(res_shrunken$pvalue)), ]
## log2 fold change (MAP): treatment diabetes vs control
## Wald test p-value: treatment diabetes vs control
## DataFrame with 5390 rows and 5 columns
## baseMean log2FoldChange lfcSE pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000051951 2.94931283734203 0.00193931583011697 0.0987541555042176 0.815671815032003 NA
## ENSMUSG00000025900 7.98440212267686 -0.00159054986258085 0.0990616361468123 0.516897947265442 NA
## ENSMUSG00000102275 11.3634433371236 0.0032346167578079 0.0978750167346095 0.837604323246014 NA
## ENSMUSG00000062588 1.55642329438726 -0.00153644335324558 0.0989064794276048 0.798274109658747 NA
## ENSMUSG00000102135 6.88220201879389 -0.00766145994306267 0.0987541652075466 0.526657604608749 NA
## ... ... ... ... ... ...
## ENSMUSG00000064371 14.2207145265809 0.00365998599894141 0.0976245290202735 0.831721905863424 NA
## ENSMUSG00000094054 3.56162319611929 0.00883543510978284 0.0992999978243594 0.296948070722092 NA
## ENSMUSG00000079190 3.94980632963843 0.00889603402142574 0.0993601014499339 0.260280448736632 NA
## ENSMUSG00000079808 5.77444040572882 0.00963515698837386 0.099237022184435 0.347296219445711 NA
## ENSMUSG00000096730 6.19649895917884 2.45647752665413 1.61876698725542 0.00357463030851782 NA
Visualizing results
Heatmaps
# Plot normalized counts (z-scores)
pheatmap(counts_sig_norm[2:7],
color = brewer.pal(8, 'YlOrRd'),
cluster_rows = T,
show_rownames = F,
annotation_col = as.data.frame(colData(dds)) %>% select(label),
border_color = NA,
fontsize = 10,
scale = 'row',
fontsize_row = 10,
height = 20)

# Plot log-transformed counts
pheatmap(counts_sig_log[2:7],
color = rev(brewer.pal(8, 'RdYlBu')),
cluster_rows = T,
show_rownames = F,
annotation_col = as.data.frame(colData(dds)) %>% select(label),
border_color = NA,
fontsize = 10,
fontsize_row = 10,
height = 20)

# Plot log-transformed counts (top 24 DE genes)
pheatmap((counts_sig_log %>% filter(ensembl_gene_id %in% res_sig_df$ensembl_gene_id[1:24]))[2:7],
color = rev(brewer.pal(8, 'RdYlBu')),
cluster_rows = T,
show_rownames = F,
annotation_col = as.data.frame(colData(dds)) %>% select(label),
fontsize = 10,
fontsize_row = 10,
height = 20)

Volcano plots
# Unshrunken LFC
res_df %>%
mutate(
sig_threshold = if_else(
padj < pval_cutoff & abs(log2FoldChange) >= lfc_cutoff,
if_else(log2FoldChange > 0, 'DE-up', 'DE-down'),
'non-DE'
)
) %>%
filter(!is.na(sig_threshold)) %>%
ggplot() +
geom_point(aes(x = log2FoldChange, y = -log10(padj), colour = sig_threshold)) +
scale_color_manual(values = c('blue', 'red', 'gray')) +
xlab('log2 fold change') +
ylab('-log10 adjusted p-value')

# Shrunken LFC
res_shrunken_df %>%
mutate(
sig_threshold = if_else(
padj < pval_cutoff & abs(log2FoldChange) >= lfc_cutoff,
if_else(log2FoldChange > 0, 'DE-up', 'DE-down'),
'non-DE'
)
) %>%
filter(!is.na(sig_threshold)) %>%
ggplot() +
geom_point(aes(x = log2FoldChange, y = -log10(padj), colour = sig_threshold)) +
scale_color_manual(values = c('blue', 'red', 'gray')) +
xlab('log2 fold change') +
ylab('-log10 adjusted p-value')

GSEA (all)
Hallmark genesets
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_h) %>% plot_enrichment_table(rank_lfc, mm_h)

# Wald stat
get_fgsea_res(rank_stat, mm_h) %>% plot_enrichment_table(rank_stat, mm_h)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_h) %>% plot_enrichment_table(rank_pval, mm_h)

GO biological process
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_bp) %>% plot_enrichment_table(rank_lfc, mm_c5_bp)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_bp) %>% plot_enrichment_table(rank_stat, mm_c5_bp)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_bp) %>% plot_enrichment_table(rank_pval, mm_c5_bp)

GO cellular component
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_cc) %>% plot_enrichment_table(rank_lfc, mm_c5_cc)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_cc) %>% plot_enrichment_table(rank_stat, mm_c5_cc)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_cc) %>% plot_enrichment_table(rank_pval, mm_c5_cc)

GO molecular function
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_mf) %>% plot_enrichment_table(rank_lfc, mm_c5_mf)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_mf) %>% plot_enrichment_table(rank_stat, mm_c5_mf)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_mf) %>% plot_enrichment_table(rank_pval, mm_c5_mf)

GSEA (DE)
Hallmark genesets
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_h) %>% plot_enrichment_table(rank_lfc, mm_h)

# Wald stat
get_fgsea_res(rank_stat, mm_h) %>% plot_enrichment_table(rank_stat, mm_h)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_h) %>% plot_enrichment_table(rank_pval, mm_h)

GO biological process
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_bp) %>% plot_enrichment_table(rank_lfc, mm_c5_bp)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_bp) %>% plot_enrichment_table(rank_stat, mm_c5_bp)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_bp) %>% plot_enrichment_table(rank_pval, mm_c5_bp)

GO cellular component
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_cc) %>% plot_enrichment_table(rank_lfc, mm_c5_cc)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_cc) %>% plot_enrichment_table(rank_stat, mm_c5_cc)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_cc) %>% plot_enrichment_table(rank_pval, mm_c5_cc)

GO molecular function
# Shrunken LFC
get_fgsea_res(rank_lfc, mm_c5_mf) %>% plot_enrichment_table(rank_lfc, mm_c5_mf)

# Wald stat
get_fgsea_res(rank_stat, mm_c5_mf) %>% plot_enrichment_table(rank_stat, mm_c5_mf)

# Rank: sign(LFC) * -log10(pvalue)
get_fgsea_res(rank_pval, mm_c5_mf) %>% plot_enrichment_table(rank_pval, mm_c5_mf)

System Info
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] multiMiR_1.8.0 VennDiagram_1.6.20 futile.logger_1.4.3 fgsea_1.12.0 Rcpp_1.0.3 RColorBrewer_1.1-2 pheatmap_1.0.12 DESeq2_1.26.0 SummarizedExperiment_1.16.1 DelayedArray_0.12.3 BiocParallel_1.20.1 matrixStats_0.57.0 Biobase_2.46.0 GenomicRanges_1.38.0 GenomeInfoDb_1.22.1 IRanges_2.20.2 S4Vectors_0.24.4 BiocGenerics_0.32.0 scales_1.1.1 forcats_0.4.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.3 readr_1.3.1 tidyr_1.0.0 tibble_3.1.0 ggplot2_3.3.3 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 ellipsis_0.3.0 htmlTable_1.13.3 XVector_0.26.0 base64enc_0.1-3 rstudioapi_0.10 farver_2.1.0 bit64_0.9-7 mvtnorm_1.1-1 apeglm_1.8.0 AnnotationDbi_1.48.0 fansi_0.4.0 lubridate_1.7.4 xml2_1.2.2 splines_3.6.3 geneplotter_1.64.0 knitr_1.25 Formula_1.2-3 jsonlite_1.6 broom_0.7.5 annotate_1.64.0 cluster_2.1.0 png_0.1-7 compiler_3.6.3 httr_1.4.1 backports_1.1.5 assertthat_0.2.1 Matrix_1.2-18 cli_1.1.0 formatR_1.7 acepack_1.4.1 htmltools_0.5.1.1 tools_3.6.3 coda_0.19-3 gtable_0.3.0 glue_1.4.2 GenomeInfoDbData_1.2.2 fastmatch_1.1-0 bbmle_1.0.23.1 cellranger_1.1.0 jquerylib_0.1.3 vctrs_0.3.4 xfun_0.22 rvest_0.3.5 lifecycle_0.2.0 XML_3.99-0.3 MASS_7.3-51.5 zlibbioc_1.32.0 hms_0.5.2 lambda.r_1.2.4 yaml_2.2.0 memoise_1.1.0 gridExtra_2.3 emdbook_1.3.12 sass_0.3.1 bdsmatrix_1.3-4 rpart_4.1-15 latticeExtra_0.6-29 stringi_1.4.3 RSQLite_2.2.1 genefilter_1.68.0 checkmate_1.9.4 rlang_0.4.8 pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14 lattice_0.20-38 labeling_0.3 htmlwidgets_1.5.1 bit_1.1-15.1 tidyselect_1.1.0 plyr_1.8.4 magrittr_1.5 R6_2.4.0 generics_0.0.2 Hmisc_4.3-0 DBI_1.1.0 pillar_1.5.1 haven_2.2.0 foreign_0.8-75 withr_2.1.2 survival_3.1-8 RCurl_1.95-4.12 nnet_7.3-12 modelr_0.1.5 crayon_1.3.4 futile.options_1.0.1 utf8_1.1.4 rmarkdown_2.7 jpeg_0.1-8.1 locfit_1.5-9.4 readxl_1.3.1 data.table_1.13.6 blob_1.2.1 digest_0.6.27 xtable_1.8-4 numDeriv_2016.8-1.1 munsell_0.5.0 bslib_0.2.4